*compile comprehensive list of zipcode coordinates for nursing homes
import delimited "P:\rudpa\shared\holdthendelete\ziplatlon\us-zip-code-latitude-and-longitude.csv", delimiter(";") stringcols(1) clear 
rename zip hkzip
rename latitude  lat_z5
rename longitude lon_z5
keep hkzip lat_z5 lon_z5
drop if mi(lat_z5)|mi(lon_z5)
isid hkzip
tempfile temp1
save `temp1'

import delimited "P:\rudpa\shared\holdthendelete\ziplatlon\zips2013.csv", delimiter(",") stringcols(1) clear
rename zip hkzip
rename lat lat_z5
rename lng lon_z5
drop if mi(lat_z5)|mi(lon_z5)
isid hkzip
tempfile temp2
save `temp2'

use "P:\rudpa\shared\holdthendelete\ziplatlon\zips.dta", clear
keep if ZipYear>=2011 & !mi(ZipYear)
rename ZipCode hkzip
rename Latitude lat_z5
rename Longitude lon_z5
drop ZipYear
duplicates drop
duplicates tag hkzip, gen(dup)
gen temp=(lat_z5==0)
drop if dup==1 & temp==1
drop dup temp
drop if mi(lat_z5)|mi(lon_z5)
duplicates drop hkzip, force

merge 1:1 hkzip using `temp1'
tab _merge
drop _merge

merge 1:1 hkzip using `temp2'
tab _merge // this data adds nothing
drop _merge
di _N

*need to do in 2 parts for cross
gen n=_n
preserve
keep if n<22000
drop n
tempfile zips1
save `zips1'
restore

preserve
keep if n>=22000
drop n
tempfile zips2
save `zips2'
restore

*open nh data, lets just assume that if nh lat/lon is missing than you can use the modal value from this time period
*not sure in practice if that fills in any missing values but why not

use "P:\rudpa\shared\holdthendelete\annosc_2018b.dta", clear
keep if key_year>=2016
bys accpt_id: egen x=mode(nhlat)
bys accpt_id: egen y=mode(nhlong)
replace nhlat=x if mi(nhlat)
replace nhlong=y if mi(nhlong)
drop x y
keep accpt_id key_year prov0475 nhlon nhlat // add other nh chars if you want to get nearest X nh but keep in mind that more than one can be nearest
drop if mi(key_year)
tempfile nhdata
desc
save `nhdata'

foreach year in 2016 2017 2018  {
	*all possible zip codes in two parts for the cross, now get NHs per zip.
	use `nhdata', clear
	keep if key_year==`year'
	drop key_year
	rename nhlat lat_nh
	rename nhlong lon_nh
	duplicates drop
	count if mi(lat_nh)|mi(lon_nh)
		disp "There are " `r(N)' " records with coordinate missing."
	drop if mi(lat_nh)|mi(lon_nh) // if coordinates missing then :-/ but happens not that much
	
	duplicates report accpt_id
	duplicates drop accpt_id, force
	isid accpt_id
	tempfile nhs
	save `nhs'
	
	forvalues n=1/2 {
		use `zips`n'', clear
		cross using `nhs'
		
		replace lat_nh=0.01745*lat_nh
		replace lon_nh=0.01745*lon_nh
		replace lat_z5=0.01745*lat_z5
		replace lon_z5=0.01745*lon_z5
		g dis_a=(sin((lat_nh-lat_z5)/2))^2 + cos(lat_nh) * cos(lat_z5) * (sin((lon_nh-lon_z5)/2))^2
		g dis_c=2*atan2((dis_a)^0.5,(1-dis_a)^0.5)
		g dis5=dis_c*3958.756
		drop dis_a dis_c
	
	
		drop if mi(dis5)
		keep hkzip accpt_id dis5 prov0475
		tempfile part`n'
		save `part`n''
		}
		
	use `part1', clear
	append using `part2'
	sort hkzip dis5
	by hkzip: gen n=_n
	*keep if n==1
	
	keep hkzip dis5 prov0475
	gen year=`year'
	di _N
	save "P:\rudpa\shared\holdthendelete\zip_nearestNH_hace_`year'.dta", replace
	}

****

